library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lubridate)
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
library(fpp)
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
library(vars)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: strucchange
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## 
## The following object is masked from 'package:stringr':
## 
##     boundary
## 
## Loading required package: urca
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## 
## The following object is masked from 'package:readr':
## 
##     spec
## 
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## 
## The following object is masked from 'package:utils':
## 
##     tar

Combining and Preparing Data: North

icecaps_N <- data.frame()

# Loop through the 12 CSV files
for (i in 1:12) {
  if (i < 10){
    df <- read.csv(paste0("N_0", i, "_extent_v3.0.csv"))
  }
  if (i >= 10){
    df <- read.csv(paste0("N_", i, "_extent_v3.0.csv"))
  }
  icecaps_N <- rbind(icecaps_N, df)
}

# Combine the year and month columns into a new "date" column and convert to a year-month format
icecaps_N <- icecaps_N %>%
  mutate(date = ym(paste(year, str_pad(mo, 2, pad = "0"), sep = "-"))) %>%
  mutate(date = as.yearmon(date)) %>%
  dplyr::select(date, extent, area) %>% # Select only the date and extent columns
  arrange(date) %>% # Sort by the date column
  mutate(extent = replace(extent, extent == -9999.00, NA), # Replace -9999.00 with NA in the extent column
         area = replace(area, area == -9999.00, NA)) # Replace -9999.00 with NA in the area column

head(icecaps_N)
##       date extent  area
## 1 Nov 1978  11.65  9.04
## 2 Dec 1978  13.67 10.90
## 3 Jan 1979  15.41 12.41
## 4 Feb 1979  16.18 13.18
## 5 Mar 1979  16.34 13.21
## 6 Apr 1979  15.45 12.53

Combining and Preparing Data: South

icecaps_S <- data.frame()

# Loop through the 12 CSV files
for (i in 1:12) {
  if (i < 10){
    df <- read.csv(paste0("S_0", i, "_extent_v3.0.csv"))
  }
  if (i >= 10){
    df <- read.csv(paste0("S_", i, "_extent_v3.0.csv"))
  }
  icecaps_S <- rbind(icecaps_S, df)
}

# Combine the year and month columns into a new "date" column and convert to a year-month format
icecaps_S <- icecaps_S %>%
  mutate(date = ym(paste(year, str_pad(mo, 2, pad = "0"), sep = "-"))) %>%
  mutate(date = as.yearmon(date)) %>%
  dplyr::select(date, extent, area) %>% # Select only the date and extent columns
  arrange(date) %>% # Sort by the date column
  mutate(extent = replace(extent, extent == -9999.00, NA), # Replace -9999.00 with NA in the extent column
         area = replace(area, area == -9999.00, NA)) # Replace -9999.00 with NA in the area column


head(icecaps_S)
##       date extent  area
## 1 Nov 1978  15.90 11.69
## 2 Dec 1978  10.40  6.97
## 3 Jan 1979   5.40  3.47
## 4 Feb 1979   3.14  2.11
## 5 Mar 1979   4.00  2.66
## 6 Apr 1979   7.49  5.45

Handling Missing Data for North and South

# Function that imputates missing data by replacing missing values with the average of all values of the same month

fill_missing <- function(df) {
  for (i in 1:nrow(df)){
    if (is.na(df[i, "extent"])){ # if there is a missing value in extent column
      my_sum <- 0
      month_count <- 0
      df_month <- month(df[i,"date"]) # store the month of missing value
      for (j in 1:nrow(df)){ 
          if (i != j){
            # take the average of extents of all same months
            if (month(df[j,"date"]) == df_month){ 
              my_sum <- my_sum + (df[j,"extent"])
              month_count <- month_count + 1
            }
          }
      }
      # replace missing value with calculated average
      df[i, "extent"] <- (my_sum/month_count)
    }
    
    if (is.na(df[i, "area"])){ # if there is a missing value in area column
      my_sum <- 0
      month_count <- 0
      df_month <- month(df[i,"date"]) # store the month of missing value
      for (j in 1:nrow(df)){ 
          if (i != j){
            # take the average of areas of all same months
            if (month(df[j,"date"]) == df_month){ 
              my_sum <- my_sum + (df[j,"area"])
              month_count <- month_count + 1
            }
          }
      }
      # replace missing value with calculated average
      df[i, "area"] <- (my_sum/month_count)
    }
  }
  df
  }
icecaps_N <- fill_missing(icecaps_N)
icecaps_S <- fill_missing(icecaps_S)

Plotting the Data

ts_extent_N <- ts(icecaps_N$extent, start=c(1979,1), frequency=12)
ts_area_N <- ts(icecaps_N$area, start=c(1979,1), frequency=12)


autoplot(ts_extent_N, xlab = "Year", ylab = "Extent", main = "Original Time Series Extent (North)")

autoplot(ts_area_N, xlab = "Year", ylab = "Area", main = "Original Time Series Area (North)")

ts_extent_S <- ts(icecaps_S$extent, start=c(1979,1), frequency=12)
ts_area_S <- ts(icecaps_S$area, start=c(1979,1), frequency=12)

autoplot(ts_extent_S, xlab = "Year", ylab = "Extent", main = "Original Time Series Extent (South)")

autoplot(ts_area_S, xlab = "Year", ylab = "Area", main = "Original Time Series Area (South)")

Getting train and test data

train_extent_N <- window(ts_extent_N, start = c(1990, 1), end = c(2019, 12))
test_extent_N <- window(ts_extent_N, start = c(2020, 1))
train_area_N <- window(ts_area_N, start = c(1990, 1), end = c(2019, 12))
test_area_N <- window(ts_area_N, start = c(2020, 1))
#exponential smoothning
fit_add <- decompose(train_extent_N, type="additive")
plot(fit_add)

fit_mult <- decompose(train_extent_N, type="multiplicative")
plot(fit_mult)

fit_add <- decompose(train_area_N, type="additive")
plot(fit_add)

fit_mult <- decompose(train_area_N, type="multiplicative")
plot(fit_mult)

#holt winters additive
fit_hw_add <- hw(train_extent_N, h=length(test_extent_N), seasonal="add")
summary(fit_hw_add)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = train_extent_N, h = length(test_extent_N), seasonal = "add") 
## 
##   Smoothing parameters:
##     alpha = 0.8553 
##     beta  = 0.0244 
##     gamma = 0.1446 
## 
##   Initial states:
##     l = 12.003 
##     b = -0.0651 
##     s = -3.115 -5.2471 -4.5906 -2.5271 -0.2114 1.33
##            2.9469 3.8459 3.8495 2.9995 1.4686 -0.7493
## 
##   sigma:  0.2988
## 
##      AIC     AICc      BIC 
## 1266.877 1268.667 1332.941 
## 
## Error measures:
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 0.004451767 0.2920815 0.2137866 -0.3050211 2.274539 0.6206018
##                  ACF1
## Training set 0.192518
## 
## Forecasts:
##          Point Forecast       Lo 80     Hi 80        Lo 95     Hi 95
## Jan 2020       8.919274  8.53635046  9.302198  8.333642910  9.504905
## Feb 2020      11.076380 10.56637746 11.586382 10.296398538 11.856361
## Mar 2020      12.482069 11.86567314 13.098464 11.539373061 13.424764
## Apr 2020      13.222261 12.51069392 13.933828 12.134013089 14.310509
## May 2020      13.411099 12.61145596 14.210742 12.188150553 14.634047
## Jun 2020      12.679263 11.79641434 13.562111 11.329062705 14.029462
## Jul 2020      11.249293 10.28675575 12.211830  9.777219286 12.721367
## Aug 2020       9.436336  8.39673332 10.475939  7.846400877 11.026271
## Sep 2020       6.582164  5.46749742  7.696831  4.877428416  8.286900
## Oct 2020       4.155141  2.96695840  5.343323  2.337972663  5.972309
## Nov 2020       3.493723  2.23323425  4.754212  1.565971631  5.421475
## Dec 2020       5.417641  4.08579155  6.749490  3.380753104  7.454529
## Jan 2021       8.606855  7.18606732 10.027642  6.433947808 10.779762
## Feb 2021      10.763961  9.27398818 12.253933  8.485244442 13.042677
## Mar 2021      12.169649 10.61085048 13.728448  9.785672163 14.553627
## Apr 2021      12.909842 11.28247555 14.537208 10.420999942 15.398683
## May 2021      13.098680 11.40292293 14.794436 10.505243523 15.692116
## Jun 2021      12.366843 10.60280277 14.130884  9.668976093 15.064711
## Jul 2021      10.936874  9.10459679 12.769151  8.134647928 13.739100
## Aug 2021       9.123917  7.22339987 11.024434  6.217326974 12.030507
## Sep 2021       6.269745  4.30094094  8.238549  3.258718987  9.280771
## Oct 2021       3.842722  1.80554497  5.879898  0.727128851  6.958314
## Nov 2021       3.181304  1.07563697  5.286971 -0.039035888  6.401644
## Dec 2021       5.105222  2.93091686  7.279526  1.779909439  8.430534
## Jan 2022       8.294436  6.03707679 10.551795  4.842103173 11.746768
## Feb 2022      10.451541  8.12560055 12.777482  6.894321872 14.008761
## Mar 2022      11.857230  9.46247032 14.251990  8.194761012 15.519699
## Apr 2022      12.597422 10.13359088 15.061254  8.829317224 16.365528
## May 2022      12.786260 10.25309087 15.319430  8.912111895 16.660409
## Jun 2022      12.054424  9.45163814 14.657210  8.073806470 16.035042
## Jul 2022      10.624455  7.95176303 13.297146  6.536925577 14.711984
## Aug 2022       8.811498  6.06860166 11.554394  4.616600273 13.006395
## Sep 2022       5.957326  3.14391826  8.770733  1.654590245 10.260061
## Oct 2022       3.530302  0.64606810  6.414536 -0.880753250  7.941358
## Nov 2022       2.868885 -0.08649759  5.824267 -1.650982577  7.388752
## Dec 2022       4.792802  1.76594389  7.819661  0.163621773  9.421983
## Jan 2023       7.982016  4.87103010 11.093003  3.224173402 12.739859
## Feb 2023      10.139122  6.95626788 13.321976  5.271366637 15.006878
## Mar 2023      11.544811  8.28973559 14.799886  6.566602765 16.523019
## Apr 2023      12.285003  8.95735028 15.612656  7.195797208 17.374209
## May 2023      12.473841  9.07325151 15.874431  7.273088046 17.674594
## Jun 2023      11.742005  8.26811684 15.215893  6.429151585 17.054858
## Jul 2023      10.312035  6.76448530 13.859585  4.886525731 15.737545
## Aug 2023       8.499078  4.87750079 12.120656  2.960353427 14.037803
## Sep 2023       5.644907  1.94893449  9.340879 -0.007594972 11.297408
## Oct 2023       3.217883 -0.55285203  6.988618 -2.548958613  8.984725
## Nov 2023       2.556466 -1.28940190  6.402333 -3.325281212  8.438213
autoplot(fit_hw_add) +
  autolayer(test_extent_N, series = "Actual") +
  xlab("Time") +
  ylab("Extent") +
  ggtitle("Holt-Winters Forecast and Actual Values") +
  guides(color = guide_legend(title = "Series"))

#Red- Actual 
# blue line predicted
# blue shade confidence interval
#holt winters additive damping
fit_hw_add_damp <- hw(train_extent_N, h=length(test_extent_N), seasonal="add",
           damped=TRUE)
summary(fit_hw_add_damp)
## 
## Forecast method: Damped Holt-Winters' additive method
## 
## Model Information:
## Damped Holt-Winters' additive method 
## 
## Call:
##  hw(y = train_extent_N, h = length(test_extent_N), seasonal = "add",  
## 
##  Call:
##      damped = TRUE) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0059 
##     gamma = 1e-04 
##     phi   = 0.9645 
## 
##   Initial states:
##     l = 11.4974 
##     b = 0.0245 
##     s = -3.4534 -5.408 -4.6346 -2.3524 0.1138 1.7213
##            3.1181 3.8539 3.6908 2.8529 1.3501 -0.8525
## 
##   sigma:  0.2875
## 
##      AIC     AICc      BIC 
## 1240.114 1242.120 1310.064 
## 
## Error measures:
##                       ME      RMSE       MAE        MPE    MAPE      MASE
## Training set -0.00758449 0.2806433 0.2026014 -0.2644521 2.16215 0.5881322
##                   ACF1
## Training set 0.1013728
## 
## Forecasts:
##          Point Forecast      Lo 80     Hi 80      Lo 95     Hi 95
## Jan 2020       8.324067  7.9556034  8.692531  7.7605505  8.887584
## Feb 2020      10.520270  9.9977183 11.042821  9.7210964 11.319443
## Mar 2020      12.016694 11.3749254 12.658463 11.0351936 12.998194
## Apr 2020      12.848605 12.1055428 13.591667 11.7121895 13.985020
## May 2020      13.005724 12.1727550 13.838693 11.7318078 14.279641
## Jun 2020      12.264377 11.3495479 13.179207 10.8652665 13.663488
## Jul 2020      10.862066  9.8714479 11.852684  9.3470464 12.377085
## Aug 2020       9.249157  8.1875401 10.310773  7.6255542 10.872759
## Sep 2020       6.777849  5.6491381  7.906561  5.0516344  8.504065
## Oct 2020       4.490675  3.2981303  5.683220  2.6668351  6.314516
## Nov 2020       3.712565  2.4589643  4.966165  1.7953482  5.629781
## Dec 2020       5.662783  4.3505314  6.975034  3.6558675  7.669698
## Jan 2021       8.259228  6.8904242  9.628033  6.1658230 10.352634
## Feb 2021      10.457732  9.0342557 11.881208  8.2807130 12.634751
## Mar 2021      11.956376 10.4799013 13.432850  9.6983030 14.214448
## Apr 2021      12.790427 11.2624650 14.318389 10.4536107 15.127244
## May 2021      12.949611 11.3715341 14.527688 10.5361506 15.363072
## Jun 2021      12.210256 10.5833194 13.837192  9.7220713 14.698440
## Jul 2021      10.809865  9.1352242 12.484506  8.2487230 13.371007
## Aug 2021       9.198808  7.4775306 10.920086  6.5663412 11.831276
## Sep 2021       6.729288  4.9623644  8.496212  4.0270115  9.431565
## Oct 2021       4.443837  2.6321916  6.255483  1.6731643  7.214510
## Nov 2021       3.667389  1.8118856  5.522892  0.8296414  6.505136
## Dec 2021       5.619210  3.7206605  7.517760  2.7156290  8.522791
## Jan 2022       8.217202  6.2763624 10.158042  5.2489438 11.185461
## Feb 2022      10.417197  8.4347964 12.399598  7.3853768 13.449018
## Mar 2022      11.917280  9.8940005 13.940559  8.8229413 15.011618
## Apr 2022      12.752718 10.6892090 14.816228  9.5968530 15.908584
## May 2022      12.913241 10.8101168 15.016365  9.6967902 16.129692
## Jun 2022      12.175176 10.0330243 14.317328  8.8990377 15.451315
## Jul 2022      10.776030  8.5954110 12.956650  7.4410608 14.111000
## Aug 2022       9.166174  6.9476230 11.384726  5.7731928 12.559156
## Sep 2022       6.697812  4.4418418  8.953783  3.2476032 10.148021
## Oct 2022       4.413479  2.1205810  6.706376  0.9067944  7.920163
## Nov 2022       3.638108  1.3087555  5.967460  0.0756710  7.200544
## Dec 2022       5.590968  3.2256159  7.956320  1.9734741  9.208462
## Jan 2023       8.189963  5.7890414 10.590884  4.5180705 11.861854
## Feb 2023      10.390924  7.9548626 12.826986  6.6652895 14.116559
## Mar 2023      11.891939  9.4211441 14.362734  8.1131844 15.670693
## Apr 2023      12.728277 10.2231425 15.233412  8.8970043 16.559550
## May 2023      12.889667 10.3505728 15.428761  9.0064575 16.772876
## Jun 2023      12.152439  9.5797534 14.725124  8.2178559 16.087022
## Jul 2023      10.754100  8.1481799 13.360020  6.7686891 14.739511
## Aug 2023       9.145022  6.5062133 11.783831  5.1093121 13.180733
## Sep 2023       6.677411  4.0060484  9.348773  2.5919145 10.762907
## Oct 2023       4.393801  1.6902110  7.097392  0.2590167  8.528586
## Nov 2023       3.619129  0.8836271  6.354630 -0.5644600  7.802717
autoplot(fit_hw_add_damp) +
  autolayer(test_extent_N, series = "Actual") +
  xlab("Time") +
  ylab("Extent") +
  ggtitle("Holt-Winters Forecast and Actual Values") +
  guides(color = guide_legend(title = "Series"))

# Comparing RMSE Train
print(accuracy(fit_hw_add)[2])
## [1] 0.2920815
print(accuracy(fit_hw_add_damp)[2])
## [1] 0.2806433
#
checkresiduals(fit_hw_add_damp)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' additive method
## Q* = 114.26, df = 24, p-value = 1.008e-13
## 
## Model df: 0.   Total lags used: 24
accuracy_hw_add_damp <- accuracy(fit_hw_add_damp, test_extent_N)
accuracy_hw_add_damp
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.00758449 0.2806433 0.2026014 -0.2644521  2.16215 0.5881322
## Test set      1.40202303 1.4790112 1.4161840 13.7836164 14.04930 4.1110446
##                   ACF1 Theil's U
## Training set 0.1013728        NA
## Test set     0.7192263 0.6200398

Auto Arima

arima_model <- auto.arima(train_extent_N, seasonal=TRUE) 

arima_model
## Series: train_extent_N 
## ARIMA(2,0,0)(2,1,2)[12] with drift 
## 
## Coefficients:
##          ar1      ar2     sar1    sar2     sma1     sma2    drift
##       0.8705  -0.1789  -0.6624  0.1020  -0.1334  -0.5246  -0.0051
## s.e.  0.0539   0.0538   0.2918  0.0973   0.2916   0.2024   0.0009
## 
## sigma^2 = 0.06315:  log likelihood = -15.89
## AIC=47.78   AICc=48.2   BIC=78.59
checkresiduals(arima_model$residuals)

## 
##  Ljung-Box test
## 
## data:  Residuals
## Q* = 22.408, df = 24, p-value = 0.5549
## 
## Model df: 0.   Total lags used: 24
arima_forecast <- forecast(arima_model, h = length(test_extent_N))

autoplot(arima_forecast) +
  autolayer(test_extent_N, series = "Actual") +
  xlab("Time") +
  ylab("Extent") +
  ggtitle("Forecast and Actual Values") +
  guides(color = guide_legend(title = "Series"))

plot(arima_forecast$mean,  xlab = "Months", ylab = "Extent", col = "red", ylim=c(0,20))
lines(test_extent_N, col = "blue")
legend("bottomleft", legend = c("Extent Forecast", "Original Time Series"), col = c("red", "blue"), lty = c(1,1))

accuracy(arima_forecast, test_extent_N)
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 0.006692764 0.2445760 0.1808994 -0.1175827 1.982333 0.5251335
## Test set     0.419576368 0.5518514 0.5065326  3.9381654 5.523027 1.4704150
##                     ACF1 Theil's U
## Training set 0.005160017        NA
## Test set     0.761791620 0.2848214